import numpy as np
import tensorflow as tf
from tensorflow import keras
# Display
from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
In the field of computer vision, I have found that Convolutional Neural Networks (CNNs) are powerful tools for image classification tasks. However, understanding what regions of an image the network uses to make its predictions can be difficult. This is where Gradient-weighted Class Activation Mapping (GRAD-CAM) comes in. In this tutorial, I will explore how to apply GRAD-CAM to a pre-trained CNN model and then use it on my own data used in the previous CNN exercise to gain insights into the model's behavior.
GRAD-CAM (Gradient-weighted Class Activation Mapping) is a technique used to visualize which parts of an image a Convolutional Neural Network (CNN) is focusing on to make its predictions. It does this by analyzing the gradient information flowing into the final convolutional layer of the network, and uses this information to generate a heatmap indicating which regions of the image are most important for the network's decision. This technique provides valuable insights into the behavior of CNNs, allowing for better understanding and optimization of these models.
For this notebook I will first follow the tutorial on GRAD-CAM on https://keras.io/examples/vision/grad_cam/. Then I will try with some other images found online. Finally I will apply the model to the previous CNN dataset.
The data in the previous CNN dataset contains images of dogs and cats.
model_builder = keras.applications.xception.Xception
img_size = (299, 299)
preprocess_input = keras.applications.xception.preprocess_input
decode_predictions = keras.applications.xception.decode_predictions
last_conv_layer_name = "block14_sepconv2_act"
# The local path to our target image
img_path = keras.utils.get_file(
"african_elephant.jpg", "https://i.imgur.com/Bvro0YD.png"
)
display(Image(img_path))
def get_img_array(img_path, size):
# `img` is a PIL image of size 299x299
img = keras.preprocessing.image.load_img(img_path, target_size=size)
# `array` is a float32 Numpy array of shape (299, 299, 3)
array = keras.preprocessing.image.img_to_array(img)
# We add a dimension to transform our array into a "batch"
# of size (1, 299, 299, 3)
array = np.expand_dims(array, axis=0)
return array
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
# First, we create a model that maps the input image to the activations
# of the last conv layer as well as the output predictions
grad_model = tf.keras.models.Model(
[model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
)
# Then, we compute the gradient of the top predicted class for our input image
# with respect to the activations of the last conv layer
with tf.GradientTape() as tape:
last_conv_layer_output, preds = grad_model(img_array)
if pred_index is None:
pred_index = tf.argmax(preds[0])
class_channel = preds[:, pred_index]
# This is the gradient of the output neuron (top predicted or chosen)
# with regard to the output feature map of the last conv layer
grads = tape.gradient(class_channel, last_conv_layer_output)
# This is a vector where each entry is the mean intensity of the gradient
# over a specific feature map channel
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
# We multiply each channel in the feature map array
# by "how important this channel is" with regard to the top predicted class
# then sum all the channels to obtain the heatmap class activation
last_conv_layer_output = last_conv_layer_output[0]
heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)
# For visualization purpose, we will also normalize the heatmap between 0 & 1
heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
return heatmap.numpy()
# Prepare image
img_array = preprocess_input(get_img_array(img_path, size=img_size))
# Make model
model = model_builder(weights="imagenet")
# Remove last layer's softmax
model.layers[-1].activation = None
# Print what the top predicted class is
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])
# Generate class activation heatmap
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)
# Display heatmap
plt.matshow(heatmap)
plt.show()
1/1 [==============================] - 1s 570ms/step
Predicted: [('n02504458', 'African_elephant', 9.862388)]
def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
# Load the original image
img = keras.preprocessing.image.load_img(img_path)
img = keras.preprocessing.image.img_to_array(img)
# Rescale heatmap to a range 0-255
heatmap = np.uint8(255 * heatmap)
# Use jet colormap to colorize heatmap
jet = cm.get_cmap("jet")
# Use RGB values of the colormap
jet_colors = jet(np.arange(256))[:, :3]
jet_heatmap = jet_colors[heatmap]
# Create an image with RGB colorized heatmap
jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)
# Superimpose the heatmap on original image
superimposed_img = jet_heatmap * alpha + img
superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)
# Save the superimposed image
superimposed_img.save(cam_path)
# Display Grad CAM
display(Image(cam_path))
save_and_display_gradcam(img_path, heatmap)
Trying the model on an iamge with two pets (dog and cat).
img_path = keras.utils.get_file(
"cat_and_dog.jpg",
"https://storage.googleapis.com/petbacker/images/blog/2017/dog-and-cat-cover.jpg",
)
display(Image(img_path))
# Prepare image
img_array = preprocess_input(get_img_array(img_path, size=img_size))
# Print what the two top predicted classes are
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=2)[0])
1/1 [==============================] - 0s 83ms/step
Predicted: [('n02112137', 'chow', 4.611241), ('n02124075', 'Egyptian_cat', 4.3817363)]
1/1 [==============================] - 0s 83ms/step
Predicted: [('n02112137', 'chow', 4.611241), ('n02124075', 'Egyptian_cat', 4.3817363)]
#We generate class activation heatmap for "chow," the class index is 260
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=260)
save_and_display_gradcam(img_path, heatmap)
#We generate class activation heatmap for "egyptian cat," the class index is 285
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=285)
save_and_display_gradcam(img_path, heatmap)
The heatmap on the image shows where which areas were used to make the prediction. Chaning the predictive index affects the area of the heatmap.
To apply the above tutorial, I will use images found online to see how the model preforms.
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
BATCH_SIZE = 32
IMG_SIZE1 = (299, 299)
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE1)
Found 2000 files belonging to 2 classes.
train_dataset
<BatchDataset element_spec=(TensorSpec(shape=(None, 299, 299, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
img_path ="C:/Users/maxwe/OneDrive - Office 365 Fontys/Semester 6 AI7/Class Exercises/pexels-laura-stanley-3090875.jpg"
display(Image(img_path))
# Prepare image
img_array = preprocess_input(get_img_array(img_path, size=img_size))
# Print what the two top predicted classes are
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])
1/1 [==============================] - 0s 85ms/step
Predicted: [('n02085620', 'Chihuahua', 6.097115)]
1/1 [==============================] - 0s 85ms/step
Predicted: [('n02085620', 'Chihuahua', 6.097115)]
The model predicted the Chihuahua, lets look at the heatmap.
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None)
save_and_display_gradcam(img_path, heatmap)
The heatmap covers the face of the dog quite well.
Trying on an image with two pets. In this example I will get the predictive index with the argmax function. Changing the top 2, two see the two top predictions.
img_path ="C:/Users/maxwe/OneDrive - Office 365 Fontys/Semester 6 AI7/Class Exercises/Can-cats-and-dogs.jpg"
display(Image(img_path))
# Prepare image
img_array = preprocess_input(get_img_array(img_path, size=img_size))
# Print what the two top predicted classes are
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=2)[0])
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02106662', 'German_shepherd', 6.440115), ('n02123045', 'tabby', 4.8576)]
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02106662', 'German_shepherd', 6.440115), ('n02123045', 'tabby', 4.8576)]
The model predicted right again.
pred_index = np.argmax(preds[0])
pred_index
235
The the best prediction from the model is used to show the heatmap with the above pred_index variable
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=235)
save_and_display_gradcam(img_path, heatmap)
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=285)
save_and_display_gradcam(img_path, heatmap)
The heatmaps show the points of interest for the German Shepherd and taby cat.
I will now apply the GRAD-CAM model to the previous CNN exercise data of dogs and cat images. To begin I will try with one image.
for image, _ in train_dataset.take(1):
first_image = image[0]
first_image
<tf.Tensor: shape=(299, 299, 3), dtype=float32, numpy=
array([[[141.7279 , 142.69127 , 146.5809 ],
[124.49977 , 125.44459 , 129.33421 ],
[135.87744 , 135.4986 , 140.14395 ],
...,
[142.9397 , 128.7024 , 140.90312 ],
[156.68945 , 147.01085 , 170.56854 ],
[158.0948 , 148.4259 , 172.03961 ]],
[[119.06954 , 119.1302 , 120.90901 ],
[115.24483 , 114.91373 , 116.74818 ],
[105.70309 , 104.804054, 108.56763 ],
...,
[137.63086 , 126.82352 , 138.50116 ],
[150.46806 , 147.5936 , 162.92555 ],
[150.92033 , 148.07921 , 163.42369 ]],
[[111.21662 , 109.15154 , 108.80448 ],
[117.38404 , 114.8322 , 115.28036 ],
[ 90.60669 , 88.054855, 89.48864 ],
...,
[141.49039 , 135.15239 , 146.79514 ],
[133.8796 , 139.32265 , 143.30966 ],
[131.1235 , 136.51405 , 140.47893 ]],
...,
[[135.44305 , 98.67317 , 98.817764],
[124.64239 , 99.70928 , 95.6819 ],
[103.36113 , 92.862114, 82.86379 ],
...,
[144.79822 , 154.34175 , 115.25471 ],
[137.28058 , 130.44023 , 125.55704 ],
[150.89583 , 143.81401 , 139.05492 ]],
[[144.26514 , 109.449066, 108.94238 ],
[129.03874 , 104.105644, 100.08057 ],
[106.96862 , 95.526115, 86.93955 ],
...,
[142.07016 , 149.4972 , 118.24745 ],
[136.43942 , 127.65635 , 131.37805 ],
[149.6815 , 140.84706 , 144.68816 ]],
[[133.29909 , 100.55176 , 99.252266],
[122.29241 , 97.3593 , 93.33421 ],
[107.57475 , 96.01957 , 87.65836 ],
...,
[137.17021 , 144.3788 , 116.77431 ],
[129.48666 , 119.5997 , 128.26236 ],
[142.68097 , 132.73615 , 141.51544 ]]], dtype=float32)>
from PIL import Image
import numpy as np
# Convert the array of pixel values to an image
img = Image.fromarray(np.uint8(first_image))
img
def get_img_array2(img, size):
# `img` is a PIL image
img = img.resize(size)
# `array` is a float32 Numpy array of shape (height, width, channels)
array = keras.preprocessing.image.img_to_array(img)
# We add a dimension to transform our array into a "batch"
# of size (1, height, width, channels)
array = np.expand_dims(array, axis=0)
return array
img_array = preprocess_input(get_img_array2(img, size=IMG_SIZE1))
# Print what the two top predicted classes are
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02093256', 'Staffordshire_bullterrier', 6.445829)]
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02093256', 'Staffordshire_bullterrier', 6.445829)]
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=285)
from PIL import Image
def save_and_display_gradcam2(img, heatmap, cam_path="cam.jpg", alpha=0.4):
# Rescale heatmap to a range 0-255
heatmap = np.uint8(255 * heatmap)
# Use jet colormap to colorize heatmap
jet = cm.get_cmap("jet")
# Use RGB values of the colormap
jet_colors = jet(np.arange(256))[:, :3]
jet_heatmap = jet_colors[heatmap]
# Create an image with RGB colorized heatmap
jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
jet_heatmap = jet_heatmap.resize((img.size[0], img.size[1]))
# Superimpose the heatmap on original image
superimposed_img = Image.blend(img, jet_heatmap, alpha)
# Save the superimposed image
superimposed_img.save(cam_path)
# Display Grad CAM
display(Image.open(cam_path))
save_and_display_gradcam2(img, heatmap)
I had to convert the images and update some functions to be able to use the previous dataset but it works fine on the above image. Now I will apply the model on 10 images in the dataset.
for images, labels in train_dataset.take(10):
img = images[0] # Extract the first image from the batch
# Convert the array of pixel values to an image
img = Image.fromarray(np.uint8(np.squeeze(img)))
img_array = preprocess_input(get_img_array2(img, size=IMG_SIZE1))
# Print the top predicted class
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=285)
save_and_display_gradcam2(img, heatmap)
1/1 [==============================] - 0s 105ms/step
Predicted: [('n02123597', 'Siamese_cat', 10.271257)]
1/1 [==============================] - 0s 84ms/step
Predicted: [('n02102480', 'Sussex_spaniel', 7.2099886)]
1/1 [==============================] - 0s 93ms/step
Predicted: [('n02106662', 'German_shepherd', 11.372015)]
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02124075', 'Egyptian_cat', 9.844479)]
1/1 [==============================] - 0s 88ms/step
Predicted: [('n02093428', 'American_Staffordshire_terrier', 6.666926)]
1/1 [==============================] - 0s 108ms/step
Predicted: [('n02099601', 'golden_retriever', 9.476903)]
1/1 [==============================] - 0s 106ms/step
Predicted: [('n02109961', 'Eskimo_dog', 8.673337)]
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02123159', 'tiger_cat', 6.5013437)]
1/1 [==============================] - 0s 89ms/step
Predicted: [('n02107312', 'miniature_pinscher', 5.8847747)]
1/1 [==============================] - 0s 94ms/step
Predicted: [('n02086910', 'papillon', 7.2941933)]
The model predicts the pets quite accurately. The heatmaps are not always on the face or main features of the pet. To see if I can improve this I will use the top prediction of each image with the predictive index paramater.
for images, labels in train_dataset.take(10):
img = images[0] # Extract the first image from the batch
# Convert the array of pixel values to an image
img = Image.fromarray(np.uint8(np.squeeze(img)))
img_array = preprocess_input(get_img_array2(img, size=IMG_SIZE1))
# Print the top predicted class
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])
#deretriming the predicitve index from each image and passing it onto the heatmap
pred_index = np.argmax(preds[0])
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=pred_index)
save_and_display_gradcam2(img, heatmap)
1/1 [==============================] - 0s 106ms/step
Predicted: [('n02094114', 'Norfolk_terrier', 6.608295)]
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02123045', 'tabby', 6.7740345)]
1/1 [==============================] - 0s 87ms/step
Predicted: [('n02123597', 'Siamese_cat', 10.092904)]
1/1 [==============================] - 0s 90ms/step
Predicted: [('n02123045', 'tabby', 6.42336)]
1/1 [==============================] - 0s 109ms/step
Predicted: [('n02123045', 'tabby', 9.025244)]
1/1 [==============================] - 0s 84ms/step
Predicted: [('n02123045', 'tabby', 7.605784)]
1/1 [==============================] - 0s 85ms/step
Predicted: [('n02108551', 'Tibetan_mastiff', 6.6299224)]
1/1 [==============================] - 0s 89ms/step
Predicted: [('n02105162', 'malinois', 8.30565)]
1/1 [==============================] - 0s 90ms/step
Predicted: [('n02099712', 'Labrador_retriever', 8.45346)]
1/1 [==============================] - 0s 90ms/step
Predicted: [('n02123394', 'Persian_cat', 9.00205)]
The results seem to show the model is more focused on the faces or main features of the image. I think this above result seems to be the best result I can get so far.
#15
img = images[13]# Extract the first image from the batch
# Convert the array of pixel values to an image
img = Image.fromarray(np.uint8(np.squeeze(img)))
img_array = preprocess_input(get_img_array2(img, size=IMG_SIZE1))
# Print the top predicted class
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])
#deretriming the predicitve index from each image and passing it onto the heatmap
pred_index = np.argmax(preds[0])
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=pred_index)
save_and_display_gradcam2(img, heatmap)
1/1 [==============================] - 0s 87ms/step
Predicted: [('n03207941', 'dishwasher', 6.483304)]
The above image shows an incorrect prediction. The AI was not able to recognise the CAT. When there are multiple objects the model can focus on wrong object and try and predict that. Lets try see the top 2 predictions.
#15
img = images[13] # Extract the first image from the batch
# Convert the array of pixel values to an image
img = Image.fromarray(np.uint8(np.squeeze(img)))
img_array = preprocess_input(get_img_array2(img, size=IMG_SIZE1))
# Print the top predicted class
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=2)[0])
#deretriming the predicitve index from each image and passing it onto the heatmap
pred_index = np.argmax(preds[0])
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=pred_index)
save_and_display_gradcam2(img, heatmap)
1/1 [==============================] - 0s 86ms/step
Predicted: [('n03207941', 'dishwasher', 6.483304), ('n04004767', 'printer', 5.7793264)]
The model predicted wrong for the second prediction. I think this is because some features almost district the AI, from sometimes obvious objects.
In this exercise, I have demonstrated the application of the GRAD-CAM model in Jupyter notebooks. First, I applied the model to a pre-trained CNN to gain an understanding of how it works. Then, I applied it to my own data from a previous CNN exercise, allowing me to visualize which regions of the image the model was using to make its predictions. I also explored some tuning techniques to improve the model's performance. Looking at some examples where the AI was wrong. Overall, GRAD-CAM is a valuable tool for understanding the inner workings of a CNN, and can be used to improve model performance and gain insights into the decision-making process of deep neural networks.